####################################
#Diff-in-diff with matching and dual-distance cutoffs - PA figure, dd with dd 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
#76/276/70, 476: HAS ez-passes  
#79, 80, 81, : NO EZPass 
####################################

########################
#Initial setup 
########################
#set wd
setwd("/Users/cjerzak/Documents/Publications/Traffic/Internals/")

#external libraries 
library(ggplot2)
source("http://peterhaschke.com/Code/multiplot.R") #multiplot

#load in data 
#NJ_data <- read.csv("NJ_precinct_master.csv", header=T)
PA_data <- read.csv("PA_precinct_master.csv", header=T)


#load in the dd_with_dd function 
source("dd_with_randomTest_function.R")

##############################
#set variable values
##############################


#set general specifications for later modifications
my_data <- PA_data 

#outcomes 
outcome_time1_name <- "USPP2000"
outcome_time2_name <- "USPP2004"  #US president (dem) percent 2000, 2004

#treated basis (something having to do with E-ZPasses)
treatment_basis_name <- "EZDist_PA_NEAR_DIST"
#treatment_basis_name <- "EZ_TimeOptimized_PA_Total_Time" 

#control basis (something having to do with non-E-ZPass highway)
control_basis_names <- c("HighwayDist_PA79_NEAR_DIST", "HighwayDist_PA80_NEAR_DIST", "HighwayDist_PA81_NEAR_DIST")
#control_basis_names <- c("TimeOptimized_PA79_Total_Time", "TimeOptimized_PA80_Total_Time", "TimeOptimized_PA81_Total_Time")

#identify matching variables 
matching_variables <- c("cepctwhite", "cepctblack", "cepctwomen", "cnipoverty", "cnpopurban") #income, commute length, number of cars, etc. 

#define cutoffs 
treatment_basis_treated_max <- 250
control_basis_control_max <- 250
treatment_basis_control_min <- 5000
control_basis_treated_min <- 5000
#treatment_basis_treated_max <- 30
#control_basis_control_max <- 30
#treatment_basis_control_min <- 60
#control_basis_treated_min <- 60 

##############################
#single trial 
##############################
initial_results <- dd_with_randomTest(my_data=my_data,
                                     treatment_basis_name=treatment_basis_name, 
                                     control_basis_names=control_basis_names, 
                                     matching_variables=matching_variables, 
                                     outcome_time1_name=outcome_time1_name, 
                                     outcome_time2_name=outcome_time2_name,
                                     treatment_basis_treated_max=treatment_basis_treated_max, 
                                     treatment_basis_control_min=treatment_basis_control_min, 
                                     control_basis_control_max=control_basis_control_max,
                                     control_basis_treated_min=control_basis_treated_min,
                                     match_quietly=F, 
                                     start_browser=F, 
                                     max_perm_numb=10000, 
                                     my_method = "nearest", 
                                     my_distance="mahalanobis", 
                                     my_discard="none", 
                                     my_replace=T)
initial_results$base_result
initial_results$p_value
dim(initial_results$base_matched_dataset)

test2 <- initial_results$base_matched_dataset
dim(test1); dim(test2)

sum(test1$USPP2004)
sum(test2$USPP2004)

#loop to generate figure 
##############################
treatment_basis_control_min <- 10000
control_basis_treated_min <- 10000
max_seq <- seq(300, 2500, length.out=45)
source("dd_with_randomTest_function.R"); results_list <- list(); counter <- 0; for(i in max_seq)
{ 
  counter <- counter + 1
  print(counter)
  results_list[[counter]] <- dd_with_randomTest(my_data=my_data,
                                               treatment_basis_name=treatment_basis_name, 
                                               control_basis_names=control_basis_names, 
                                               outcome_time1_name=outcome_time1_name, 
                                               outcome_time2_name=outcome_time2_name,
                                               matching_variables=matching_variables, 
                                               treatment_basis_treated_max=i, 
                                               treatment_basis_control_min=control_basis_treated_min, 
                                               control_basis_control_max=i,
                                               control_basis_treated_min=control_basis_treated_min,
                                               match_quietly=T, 
                                               start_browser=F, 
                                               max_perm_numb=10000, 
                                               my_method = "nearest", 
                                               my_distance="mahalanobis", 
                                               my_discard="none")
}

####################
#save and plot results
####################
#save results as appropriate objects 
unlisted_results <- unlist(results_list, recursive=T, use.names=T) #$base_result
base_results <- unlisted_results[which(names(unlisted_results)=="base_result")]
p_values <- unlisted_results[which(names(unlisted_results)=="p_value")]

#setup for ggplot 2 
gg_data <- data.frame(cbind(max_seq, base_results, p_values))

g1 <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
  geom_point() + 
  ylab("DiD effect (in % points)") + 
  xlab("Maximum distance from E-ZPass or Control Highway") + 
  ggtitle("DiD Effect (in % points) vs. Maximum Distance") + 
  theme(text = element_text(size=13)) 

g2 <- ggplot(gg_data, aes(x=max_seq, y=p_values)) + 
  geom_point() + 
  geom_hline(yintercept = 0.05, line_type="dotdash") + 
  ylab("Estimated exact p-values") + 
  xlab("Maximum distance from E-ZPass or Control Highway") + 
  ggtitle("Estimated Exact P-values vs. Maximum Distance") + 
  theme(text = element_text(size=13)) 

#png(filename=save_name, width=pixel_width, height=pixel_height)
multiplot(g1, g2, layout=matrix(c(1:2), ncol=2, byrow=TRUE))
#dev.off()
